Overview
In this assessment we aim to use the MACCDC conn data to perform data analysis and modelling. First we’ll import any libraries we intend to use.
#install.packages("dbscan")
#install.packages("cluster")
#install.packages("reshape")
#install.packages("ggplot2")
#install.packages("gridExtra")
#install.packages("Matrix")
#install.packages("irlba")
#install.packages("Rtsne")
#install.packages("umap")
#install.packages("uwot")
library(dbscan)
library(cluster)
library(reshape)
library(ggplot2)
library(gridExtra)
library(Matrix)
library(irlba)
library(Rtsne)
library(umap)
library(uwot)
We first must import the data.
mydata <- read.csv("MAC.csv")
mydata <- data.frame(mydata)
mydata
We first want to look for missing data. Service, duration, orig_bytes, resp_bytes and local_orig all seem to have missing data in them so we will see what percentage.
mtab0=data.frame(
missingduration=is.na(mydata[,"duration"]),
proto=mydata[,"proto"])
mtab0=table(mtab0)
(apply(mtab0,2,function(x)x/sum(x)))
proto
missingduration icmp tcp udp
FALSE 0.8585338 0.1656118 0.3089144
TRUE 0.1414662 0.8343882 0.6910856
mtab1=data.frame(
missing_orig_bytes=is.na(mydata[,"orig_bytes"]),
proto=mydata[,"proto"])
mtab1=table(mtab1)
(apply(mtab1,2,function(x)x/sum(x)))
proto
missing_orig_bytes icmp tcp udp
FALSE 0.8585338 0.1656118 0.3089144
TRUE 0.1414662 0.8343882 0.6910856
mtab2=data.frame(
missing_resp_bytes=is.na(mydata[,"resp_bytes"]),
proto=mydata[,"proto"])
mtab2=table(mtab2)
(apply(mtab2,2,function(x)x/sum(x)))
proto
missing_resp_bytes icmp tcp udp
FALSE 0.8585338 0.1656118 0.3089144
TRUE 0.1414662 0.8343882 0.6910856
mtab3=data.frame(
missing_local_orig=is.na(mydata[,"local_orig"]),
proto=mydata[,"proto"])
mtab3=table(mtab3)
(apply(mtab3,2,function(x)x/sum(x)))
icmp tcp udp
1 1 1
Thus we are missing the local_orig feature for every data point in the data set. We may then consider dropping this entire column as it serves no use to us and we cannot impute the data without prior knowledge of the data set and what it should look like. The duration, orig_bytes and resp_bytes all appear to be missing exactly the same data - on further analysis, we see that whenever one is missing, all three are missing.
Some initial data cleansing will come from removing the X column and the ts column. The X column is produced by the sampling and since we have a random sample of the data, the ts provides no real information on the data.
unique_uid <- mydata[!duplicated(mydata[,c('uid')]),]
unique_uid
Thus all our uid’s are unique and therefore wont provide us with any extra information either since they will be uncorrelated with the rest of the data. This is the only column with this trait, and all other columns have values which occur more than once so we can drop the uid column too.
drop_columns <- c("X","ts","local_orig","uid")
mydata <- mydata[, !names(mydata) %in% drop_columns]
head(mydata)
So we have removed the columns that didn’t provide us with any extra information. We will now extract the data we will use for DBSCAN to create clusters. The following code is pulled from Alex’s workbook and allows us to pull out 7 of the features to use for DBSCAN and ensures all elements are numeric.
# miss.me <- vector(length = nrow(mydata))
# miss.me <- rep(0, times = nrow(mydata))
# for(i in 1:nrow(mydata)) {
# if(is.na(mydata$duration[i])) { miss.me[i] <- 1 }
# }
# str(mydata)
# mydata.good <- as.data.frame(cbind(id.orig_p = mydata$id.orig_p, id.resp_p = mydata$id.resp_p,
# orig_pkts = mydata$orig_pkts, orig_ip_bytes = mydata$orig_ip_bytes,
# resp_pkts = mydata$resp_pkts, resp_ip_bytes = mydata$resp_ip_bytes))
# mydata.good<- cbind(mydata.good, miss.me)
# head(mydata.good)
# str(mydata.good) # Should be only ints and nums
#
# for(i in 1:ncol(mydata.good)) { mydata.good[,i] <- as.numeric(mydata.good[,i]) }
# str(mydata.good) ## All should be nums now
# # sum(mydata.good$miss.me)/nrow(mydata.good) ## 82.7% missing
The data cleansing Alex performed wasn’t very conducive to allowing me to impute data so I will use the basis of his but make some small changes.
mydata.good <- as.data.frame(cbind(id.orig_p = mydata$id.orig_p, id.resp_p = mydata$id.resp_p, orig_pkts = mydata$orig_pkts, orig_ip_bytes = mydata$orig_ip_bytes,resp_pkts = mydata$resp_pkts, resp_ip_bytes = mydata$resp_ip_bytes))
mydata.good
I dont want to drop any data that may be important so I’ll also use the protocol, connection state and history features in my analysis.
proto <- as.factor(c(mydata$proto))
proto <- unclass(proto)
conn_state <- as.factor(c(mydata$conn_state))
conn_state <- unclass(conn_state)
history <- as.factor(c(mydata$history))
history <- unclass(history)
mydata.good$proto <- proto
mydata.good$conn_state <- conn_state
mydata.good$history <- history
for(i in 1:ncol(mydata.good)) { mydata.good[,i] <- as.numeric(mydata.good[,i]) }
mydata.good
data_missing <- as.data.frame(cbind(duration = mydata$duration, orig_bytes = mydata$orig_bytes, resp_bytes = mydata$resp_bytes))
data_missing
The below code is Alex’s method for 10-fold CV. Since we randomly sampled the intial data set, taking the top 90% of the data frame we now have is still taking a random subset so randomising the data pulled for the training/testing data set wont change the affects. Doing this like this makes the latter mean imputation much simpler.
# ## We'll do 10-fold CV and then apply DBSCAN, training on 90%
# dg <- mydata.good
# ran <- sample(1:nrow(dg), 0.9 * nrow(dg))
# nor <-function(x) { (x -min(x))/(max(x)-min(x)) }
# dg_norm <- as.data.frame(lapply(dg, nor))
# # head(dg_norm)
#
# dg_train <- dg_norm[ran,] ## extract training set
# dg_test <- dg_norm[-ran,] ## extract testing set
# dg_target_cat <- dg[ran, ncol(dg)]
# dg_test_cat <- dg[-ran, ncol(dg)]
dg_train <- mydata.good[1:round(0.9*nrow(mydata.good)), ]
dg_test <- mydata.good[tail(1:nrow(mydata.good), 0.1*nrow(mydata.good)), ]
dg_train_missing <- data_missing[1:round(0.9*nrow(data_missing)), ]
dg_test_missing<- data_missing[tail(1:nrow(data_missing), 0.1*nrow(data_missing)), ]
nor <-function(x){ (x -min(x))/(max(x)-min(x)) }
dg_train <- as.data.frame(lapply(dg_train, nor))
dg_test <- as.data.frame(lapply(dg_test, nor))
SVD
Now we can look at running DBSCAN on our data. We first need to perform PCA to figure out how many principle components to use in DBSCAN.
dg_train.svd <- svd(dg_train)
plot(dg_train.svd$d,xlab="Eigenvalue index",ylab="Eigenvalue",log="y")

plot(dg_train.svd$d,xlab="Eigenvalue index",ylab="Eigenvalue")

Plotting with the different axis gives a striking difference. I’ll follow the similar path of using the log axis and thus using 5 principal components since this is where the elbow occurs.
npcs = 5
We now plot the PCA to visualise the clusters formed here. We’re not plotting according to any categorical data i.e. normal vs non-normal so we may not get that much information from this.
i=1;j=2
plot(dg_train.svd$u[,i],
dg_train.svd$u[,j],type="p",
col="#33333311",pch=16,cex=1)

As a reflection, all the code in this document was initially run on the same data but with the miss.me column from Alex’s code above which creates a drastic difference in the output of svd. It results in us needing an extra principle component and removes the parallelograms from the plot above - therefore I would assume that ‘missingness’ has a result on clusters and is therefore dependent on which cluster a data point is placed into. Since we are trying to impute the missing data I’m going to use complete case analysis and perform clustering without reference to any missingness.
Finding Parameters for DBSCAN
Eps specifies how close the points should be to each other to form a cluster. If the distance is less than eps, they are considered neighbours. We find this number by finding the ‘knee’ in the plot below. I have chosen to use 10 (dim+1) neighbours here.
test=kNNdist(dg_train.svd$u[,1:npcs], k = 10, all=TRUE)
testmin=apply(test,1,min)
plot(sort(testmin[testmin>1e-8]),log="y")
threshholds= c(0.01,0.001,0.0001,0.00001,0.000001)
abline(h=c(0.01,0.001,0.0001,0.00001,0.000001))
abline(h=0.0001, col="red")

So we choose h=0.0001 as our limit since this allows us to capture most of the information here. We also need to define our minimum number of points to form a cluster. The recommendation is to use minPts = 2*dim for large data sets to ensure we find significant clusters but we’ll look at a range to see what outputs we could get. As a reference, Alex is using 15 clusters so we’ll aim to reduce our data set down to that many but this is dependent on how that clustering looks and performs for mean imputation.
DBSCAN
Now we finally perform DBSCAN.
minPts = c(20, 25, 30, 35, 40, 45, 50, 75, 100, 125, 150, 175, 200, 225, 250, 300, 400)
clustercounts = c()
for(val in minPts) {
dbscanres = dbscan(dg_train.svd$u[,1:npcs],eps = 0.0001,minPts = val)
clustercounts[val] <- (length(unique(dbscanres$cluster)))
}
clustercounts
[1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 180 NA
[22] NA NA NA 72 NA NA NA NA 75 NA NA NA NA 93 NA NA NA NA 110 NA NA
[43] NA NA 100 NA NA NA NA 99 NA NA NA NA NA NA NA NA NA NA NA NA NA
[64] NA NA NA NA NA NA NA NA NA NA NA 72 NA NA NA NA NA NA NA NA NA
[85] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 47 NA NA NA NA NA
[106] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 38 NA
[127] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[148] NA NA 39 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[169] NA NA NA NA NA NA 32 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[190] NA NA NA NA NA NA NA NA NA NA 17 NA NA NA NA NA NA NA NA NA NA
[211] NA NA NA NA NA NA NA NA NA NA NA NA NA NA 15 NA NA NA NA NA NA
[232] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 20 NA NA
[253] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[274] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[295] NA NA NA NA NA 24 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[316] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[337] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[358] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[379] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[400] 13
The amount of clusters we obtain stabilizes somewhere around 200 min points since we get inflections around this point. We’ll visualise them all to see what they look like and give a comparison. To create similarity between this and Alex’s clustering I may use 200 min Points but we’ll reflect on this after the visualisations.
dbscan400 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001, minPts = 400)
dbscan200 = dbscan(dg_train.svd$u[,1:npcs],eps = 0.0001,minPts = 200)
dbscan175 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001,minPts = 175)
dbscan50 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001,minPts = 50)
dbscan30 = dbscan(dg_train.svd$u[,1:npcs],eps=0.0001, minPts = 30)
# trying to calculate the silhouette score of this clustering to see if its valid or not - currently reports Error: Vector memory exhausted (limit reached?) - I've tried looking into work arounds but cant get anything working so I'll leave this for now.
#ss <- silhouette(dbscan200$cluster, dist(dg_train.svd$u))
Plotting resulting clusters
png(file = "DBSCAN400 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
a = seq(k+1,5)
for (l in a){
if(k==l){next}
plot(dg_train.svd$u[,k],
dg_train.svd$u[,l],xlab="",
ylab="",
col=c("#66666666",rainbow(41))[dbscan400$cluster+1],pch=19,cex=0.5)
}
}
par(op)
dev.off()
null device
1
png(file = "DBSCAN200 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
a = seq(k+1,5)
for (l in a){
if(k==l){next}
plot(dg_train.svd$u[,k],
dg_train.svd$u[,l],xlab="",
ylab="",
col=c("#66666666",rainbow(41))[dbscan200$cluster+1],pch=19,cex=0.5)
}
}
par(op)
dev.off()
null device
1
png(file = "DBSCAN175 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
a = seq(k+1,5)
for (l in a){
if(k==l){next}
plot(dg_train.svd$u[,k],
dg_train.svd$u[,l],xlab="",
ylab="",
col=c("#66666666",rainbow(41))[dbscan175$cluster+1],pch=19,cex=0.5)
}
}
par(op)
dev.off()
null device
1
png(file = "DBSCAN50 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
a = seq(k+1,5)
for (l in a){
if(k==l){next}
plot(dg_train.svd$u[,k],
dg_train.svd$u[,l],xlab="",
ylab="",
col=c("#66666666",rainbow(41))[dbscan50$cluster+1],pch=19,cex=0.5)
}
}
par(op)
dev.off()
null device
1
png(file = "DBSCAN30 plots.png")
op<- par(mfrow=c(2,5))
for (k in 1:4){
a = seq(k+1,5)
for (l in a){
if(k==l){next}
plot(dg_train.svd$u[,k],
dg_train.svd$u[,l],xlab="",
ylab="",
col=c("#66666666",rainbow(41))[dbscan30$cluster+1],pch=19,cex=0.5)
}
}
par(op)
dev.off()
null device
1
Lets compare the first plot for each of the four clustering’s we perfomed.
plot(dg_train.svd$u[,1],
dg_train.svd$u[,2],xlab="",
ylab="", main="minPts = 30, Clusters = 69",
col=c("#66666666",rainbow(41))[dbscan30$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
dg_train.svd$u[,2],xlab="",
ylab="", main="minPts = 50, Clusters = 95",
col=c("#66666666",rainbow(41))[dbscan50$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
dg_train.svd$u[,2],xlab="",
ylab="", main="minPts = 175, Clusters = 32",
col=c("#66666666",rainbow(41))[dbscan175$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
dg_train.svd$u[,2],xlab="",
ylab="", main="minPts = 200, Clusters = 17",
col=c("#66666666",rainbow(41))[dbscan200$cluster+1],pch=19,cex=0.5)

plot(dg_train.svd$u[,1],
dg_train.svd$u[,2],xlab="",
ylab="", main="minPts = 400, Clusters = 13",
col=c("#66666666",rainbow(41))[dbscan400$cluster+1],pch=19,cex=0.5)

Thus when clustering using larger minPts, we appear to cluster the majority of points into cluster 0 i.e the grey block in the figures. We get a merging of clusters between 30 min points and 200 min points. When performing mean imputation, we can thus either work with a large amount of clusters i.e. when the minPts is small ~30 or fewer clusters but have the majority of points in a single cluster i.e. when the minPts is large ~175.
Imputation
We’ll use the clustering with 200 min points. This allows us to keep close to the way that Alex has done it with 15 clusters and ensures that we’re likely enough to have data in each cluster to allow us to impute missingness.
dbscan200
DBSCAN clustering for 204249 objects.
Parameters: eps = 1e-04, minPts = 200
The clustering contains 16 cluster(s) and 123313 noise points.
0 1 2 3 4 5 6 7 8 9 10 11
123313 48603 12257 6099 2422 858 6505 824 1582 309 211 310
12 13 14 15 16
200 254 157 145 200
Available fields: cluster, eps, minPts
dg_train.clustered <- data.frame(dg_train)
dg_train.clustered$cluster <- dbscan200$cluster
dg_train.clustered
dg_train_missing.clustered <- data.frame(dg_train_missing)
dg_train_missing.clustered$cluster <- dbscan200$cluster
dg_train_missing.clustered
We need to check to see if we can perform imputation. If all the values in a cluster have n/a then we wont be able to perform the imputation and therefore may need to consider changing the clustering.
for(i in 0:16){
a <- dg_train_missing.clustered[dg_train_missing.clustered$cluster == i,]
b <- colSums(is.na(a))/nrow(a)
if(b["duration"] == 1){
print(paste0("Cluster ", i, " has no non na value(s)"))
}
}
[1] "Cluster 4 has no non na value(s)"
We see here that all but 1 cluster has values that allow us to impute. Cluster 4 has all n/a values and thus we cant use mean imputation to figure out what these values should be. We’ll consider other ways of imputing solely for this cluster after we’ve imputed for the other clusters. Note that none of the other tested clusters result in better options. All the other clustering’s result in more clusters with no values e.g. dbscan400 has 2 clusters with full missingness and dbscan30 has 22 clusters with full missingness.
for(i in 0:16){
assign(paste0("cluster",i), dg_train_missing.clustered[dg_train_missing.clustered$cluster == i,])
}
clusters <- c(cluster0,cluster1,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10,cluster11,cluster12,cluster13,cluster14,cluster15,cluster16)
We’ll plot the first cluster in a box plot to visualise outliers and also as a comparison for later.
meltData <- melt(cluster0)
Using as id variables
p <- ggplot(meltData, aes(factor(variable), value))
ggtitle(c("Cluster: 0"))
$title
[1] "Cluster: 0"
attr(,"class")
[1] "labels"
p + geom_boxplot() + facet_wrap(~variable, scale="free")

dmeans = c()
obmeans = c()
rbmeans = c()
for(i in 1:17){
a <- as.data.frame(c(clusters[4*i-3],clusters[4*i-2],clusters[4*i-1],clusters[4*i]))
m <- colMeans(a,na.rm = TRUE)
print(paste0("Currently working on cluster ",i-1, "."))
dmeans = c(dmeans,m[1])
obmeans = c(obmeans,m[2])
rbmeans = c(rbmeans,m[3])
for(k in (1:3)){
for(j in (1:nrow(a))){
if(is.na(a[j,k])){
a[j,k] = m[k]
}
}
}
assign(paste0("cluster",i-1),a)
}
[1] "Currently working on cluster 0."
[1] "Currently working on cluster 1."
[1] "Currently working on cluster 2."
[1] "Currently working on cluster 3."
[1] "Currently working on cluster 4."
[1] "Currently working on cluster 5."
[1] "Currently working on cluster 6."
[1] "Currently working on cluster 7."
[1] "Currently working on cluster 8."
[1] "Currently working on cluster 9."
[1] "Currently working on cluster 10."
[1] "Currently working on cluster 11."
[1] "Currently working on cluster 12."
[1] "Currently working on cluster 13."
[1] "Currently working on cluster 14."
[1] "Currently working on cluster 15."
[1] "Currently working on cluster 16."
We’ll finally get the table of means that we wanted. This gives us the mean of each missing column and the cluster they’re from.
means <- data.frame("cluster" = seq(0,16), "duration means"= dmeans, "origin_bytes means" = obmeans, "resp_bytes means " = rbmeans)
pdf("means.pdf", height=11, width=10)
grid.table(means)
dev.off()
null device
1
means
Finally, we’ll test to see how this imputation has worked. We’ll look at the error i.e. the difference between the means produced from the clustered training data and the training data we’ll cluster now. We’ll use the same parameters as defined above to maintain consistency - if we were to check these parameters, we should see similar ones since they are both random samples of the data.
dg_test.svd <- svd(dg_test)
i=1;j=2
plot(dg_test.svd$u[,i],
dg_test.svd$u[,j],type="p",
col="#33333311",pch=16,cex=1)

dbscan200Test = dbscan(dg_test.svd$u[,1:npcs],eps = 0.0001,minPts = 200)
dbscan200Test
DBSCAN clustering for 22695 objects.
Parameters: eps = 1e-04, minPts = 200
The clustering contains 0 cluster(s) and 22695 noise points.
0
22695
Available fields: cluster, eps, minPts
So what we find is that the clustering for the test split puts all 20000 data points into the first cluster. We’ll have a look at what result this gives but this ultimately looks like it wont result in any fruitful comparison to see how well DBSCAN performed.
dg_test.clustered <- data.frame(dg_test)
dg_test.clustered$cluster <- dbscan200Test$cluster
dg_test.clustered
dg_test_missing.clustered <- data.frame(dg_test_missing)
dg_test_missing.clustered$cluster <- dbscan200Test$cluster
dg_test_missing.clustered
cluster0testmeans <- as.data.frame(colMeans(dg_test_missing.clustered, na.rm = TRUE))
cluster0testmeans
NA
cluster0trainmeans <- means[1,]
diffmeans = c()
for(i in 2:4){
trainm <- cluster0trainmeans[i]
testm <- cluster0testmeans[i-1,]
diff <- 1 - (testm/trainm)
diffmeans <- c(diffmeans, diff)
}
as.data.frame(diffmeans)
Thus we have a very large difference in the means of our training data and the means of test data and thus we may assume that DBSCAN in this case doesn’t perform very well.
Finally, we’re going to visualise the data using t-SNE projection. The plots above help us understand the data but are hard to infer anything from. We’ll visualise the DBSCAN200 data below.
rtsne_out <- Rtsne(as.matrix(dg_train.clustered), pca = FALSE, verbose = TRUE, check_duplicates = FALSE)
Read the 204249 x 10 data matrix successfully!
Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
Computing input similarities...
Building tree...
- point 10000 of 204249
- point 20000 of 204249
- point 30000 of 204249
- point 40000 of 204249
- point 50000 of 204249
- point 60000 of 204249
- point 70000 of 204249
- point 80000 of 204249
- point 90000 of 204249
- point 100000 of 204249
- point 110000 of 204249
- point 120000 of 204249
- point 130000 of 204249
- point 140000 of 204249
- point 150000 of 204249
- point 160000 of 204249
- point 170000 of 204249
- point 180000 of 204249
- point 190000 of 204249
- point 200000 of 204249
Done in 54.31 seconds (sparsity = 0.000507)!
Learning embedding...
Iteration 50: error is 135.077337 (50 iterations in 123.21 seconds)
Iteration 100: error is 135.077337 (50 iterations in 135.97 seconds)
Iteration 150: error is 135.077334 (50 iterations in 134.19 seconds)
Iteration 200: error is 135.076766 (50 iterations in 132.70 seconds)
Iteration 250: error is 134.538155 (50 iterations in 141.13 seconds)
Iteration 300: error is 7.064268 (50 iterations in 240.17 seconds)
Iteration 350: error is 6.428282 (50 iterations in 202.87 seconds)
Iteration 400: error is 6.053465 (50 iterations in 125.76 seconds)
Iteration 450: error is 5.775746 (50 iterations in 125.81 seconds)
Iteration 500: error is 5.549144 (50 iterations in 134.40 seconds)
Iteration 550: error is 5.354604 (50 iterations in 138.90 seconds)
Iteration 600: error is 5.182348 (50 iterations in 127.15 seconds)
Iteration 650: error is 5.027076 (50 iterations in 125.86 seconds)
Iteration 700: error is 4.885609 (50 iterations in 127.00 seconds)
Iteration 750: error is 4.755611 (50 iterations in 127.65 seconds)
Iteration 800: error is 4.635803 (50 iterations in 146.67 seconds)
Iteration 850: error is 4.524777 (50 iterations in 102.73 seconds)
Iteration 900: error is 4.421635 (50 iterations in 88.94 seconds)
Iteration 950: error is 4.325102 (50 iterations in 88.47 seconds)
Iteration 1000: error is 4.234204 (50 iterations in 88.23 seconds)
Fitting performed in 2657.83 seconds.
plot(rtsne_out$Y, asp = 1, pch = 20,
cex = 0.1, cex.axis = 1.25, cex.lab = 1.25, cex.main = 1.5,
xlab = "t-SNE dimension 1", ylab = "t-SNE dimension 2",
main = "2D t-SNE projection",col=c("#66666666",rainbow(41))[dbscan200$cluster+1])

We’ll also look at a plot using umap.
data.umap <- umap(dg_train.clustered, init="spectral")
plot(data.umap, asp = 1, pch = 20,
cex = 0.2, cex.axis = 1.25, cex.lab = 1.25, cex.main = 1.5,
main = "2D umap projection",col=c("#66666666",rainbow(41))[dbscan200$cluster+1])

The difference is startling. Whereas the tsne plot looks fairly jumbled with clusters, with no clusters actually seeming to appear and more scattering within it, the umap plot has very discrete clusters and gives a much better visualisation. We get some scattering between clusters with grey/red points occasionally showing up where we don’t necessarily expect them but overall the clusters look very independent. With this in mind, I would presume that the clustering with a minimum points of 200 does produce valid clusters and is a good way to perform imputation based on clusters, despite some of the earlier issues that may still be valid. Additionally, the umap projection is incredibly fast compared to the tsne projection and therefore is computationally more useful.
LS0tCnRpdGxlOiAiQXNzZXNzbWVudCAyIC0gTWF0dCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICBodG1sX25vdGVib29rOiBkZWZhdWx0Ci0tLQojIE92ZXJ2aWV3CkluIHRoaXMgYXNzZXNzbWVudCB3ZSBhaW0gdG8gdXNlIHRoZSBNQUNDREMgY29ubiBkYXRhIHRvIHBlcmZvcm0gZGF0YSBhbmFseXNpcyBhbmQgbW9kZWxsaW5nLgpGaXJzdCB3ZSdsbCBpbXBvcnQgYW55IGxpYnJhcmllcyB3ZSBpbnRlbmQgdG8gdXNlLgoKYGBge3J9CiNpbnN0YWxsLnBhY2thZ2VzKCJkYnNjYW4iKQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpCiNpbnN0YWxsLnBhY2thZ2VzKCJyZXNoYXBlIikKI2luc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQojaW5zdGFsbC5wYWNrYWdlcygiZ3JpZEV4dHJhIikKI2luc3RhbGwucGFja2FnZXMoIk1hdHJpeCIpCiNpbnN0YWxsLnBhY2thZ2VzKCJpcmxiYSIpCiNpbnN0YWxsLnBhY2thZ2VzKCJSdHNuZSIpCiNpbnN0YWxsLnBhY2thZ2VzKCJ1bWFwIikKI2luc3RhbGwucGFja2FnZXMoInV3b3QiKQpsaWJyYXJ5KGRic2NhbikKbGlicmFyeShjbHVzdGVyKQpsaWJyYXJ5KHJlc2hhcGUpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkoTWF0cml4KQpsaWJyYXJ5KGlybGJhKQpsaWJyYXJ5KFJ0c25lKQpsaWJyYXJ5KHVtYXApCmxpYnJhcnkodXdvdCkKYGBgCgpXZSBmaXJzdCBtdXN0IGltcG9ydCB0aGUgZGF0YS4KCmBgYHtyfQpteWRhdGEgPC0gcmVhZC5jc3YoIk1BQy5jc3YiKQpteWRhdGEgPC0gZGF0YS5mcmFtZShteWRhdGEpCmBgYAoKYGBge3J9Cm15ZGF0YQpgYGAKV2UgZmlyc3Qgd2FudCB0byBsb29rIGZvciBtaXNzaW5nIGRhdGEuIFNlcnZpY2UsIGR1cmF0aW9uLCBvcmlnX2J5dGVzLCByZXNwX2J5dGVzIGFuZCBsb2NhbF9vcmlnIGFsbCBzZWVtIHRvIGhhdmUgbWlzc2luZyBkYXRhIGluIHRoZW0gc28gd2Ugd2lsbCBzZWUgd2hhdCBwZXJjZW50YWdlLgoKYGBge3J9Cm10YWIwPWRhdGEuZnJhbWUoCiAgICBtaXNzaW5nZHVyYXRpb249aXMubmEobXlkYXRhWywiZHVyYXRpb24iXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMD10YWJsZShtdGFiMCkKKGFwcGx5KG10YWIwLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMT1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19vcmlnX2J5dGVzPWlzLm5hKG15ZGF0YVssIm9yaWdfYnl0ZXMiXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMT10YWJsZShtdGFiMSkKKGFwcGx5KG10YWIxLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMj1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19yZXNwX2J5dGVzPWlzLm5hKG15ZGF0YVssInJlc3BfYnl0ZXMiXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMj10YWJsZShtdGFiMikKKGFwcGx5KG10YWIyLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCgptdGFiMz1kYXRhLmZyYW1lKAogICAgbWlzc2luZ19sb2NhbF9vcmlnPWlzLm5hKG15ZGF0YVssImxvY2FsX29yaWciXSksCiAgICBwcm90bz1teWRhdGFbLCJwcm90byJdKQptdGFiMz10YWJsZShtdGFiMykKKGFwcGx5KG10YWIzLDIsZnVuY3Rpb24oeCl4L3N1bSh4KSkpCmBgYApUaHVzIHdlIGFyZSBtaXNzaW5nIHRoZSBsb2NhbF9vcmlnIGZlYXR1cmUgZm9yIGV2ZXJ5IGRhdGEgcG9pbnQgaW4gdGhlIGRhdGEgc2V0LiBXZSBtYXkgdGhlbiBjb25zaWRlciBkcm9wcGluZyB0aGlzIGVudGlyZSBjb2x1bW4gYXMgaXQgc2VydmVzIG5vIHVzZSB0byB1cyBhbmQgd2UgY2Fubm90IGltcHV0ZSB0aGUgZGF0YSB3aXRob3V0IHByaW9yIGtub3dsZWRnZSBvZiB0aGUgZGF0YSBzZXQgYW5kIHdoYXQgaXQgc2hvdWxkIGxvb2sgbGlrZS4gVGhlIGR1cmF0aW9uLCBvcmlnX2J5dGVzIGFuZCByZXNwX2J5dGVzIGFsbCBhcHBlYXIgdG8gYmUgbWlzc2luZyBleGFjdGx5IHRoZSBzYW1lIGRhdGEgLSBvbiBmdXJ0aGVyIGFuYWx5c2lzLCB3ZSBzZWUgdGhhdCB3aGVuZXZlciBvbmUgaXMgbWlzc2luZywgYWxsIHRocmVlIGFyZSBtaXNzaW5nLiAKClNvbWUgaW5pdGlhbCBkYXRhIGNsZWFuc2luZyB3aWxsIGNvbWUgZnJvbSByZW1vdmluZyB0aGUgWCBjb2x1bW4gYW5kIHRoZSB0cyBjb2x1bW4uIFRoZSBYIGNvbHVtbiBpcyBwcm9kdWNlZCBieSB0aGUgc2FtcGxpbmcgYW5kIHNpbmNlIHdlIGhhdmUgYSByYW5kb20gc2FtcGxlIG9mIHRoZSBkYXRhLCB0aGUgdHMgcHJvdmlkZXMgbm8gcmVhbCBpbmZvcm1hdGlvbiBvbiB0aGUgZGF0YS4KCmBgYHtyfQp1bmlxdWVfdWlkIDwtIG15ZGF0YVshZHVwbGljYXRlZChteWRhdGFbLGMoJ3VpZCcpXSksXQp1bmlxdWVfdWlkCmBgYApUaHVzIGFsbCBvdXIgdWlkJ3MgYXJlIHVuaXF1ZSBhbmQgdGhlcmVmb3JlIHdvbnQgcHJvdmlkZSB1cyB3aXRoIGFueSBleHRyYSBpbmZvcm1hdGlvbiBlaXRoZXIgc2luY2UgdGhleSB3aWxsIGJlIHVuY29ycmVsYXRlZCB3aXRoIHRoZSByZXN0IG9mIHRoZSBkYXRhLiBUaGlzIGlzIHRoZSBvbmx5IGNvbHVtbiB3aXRoIHRoaXMgdHJhaXQsIGFuZCBhbGwgb3RoZXIgY29sdW1ucyBoYXZlIHZhbHVlcyB3aGljaCBvY2N1ciBtb3JlIHRoYW4gb25jZSBzbyB3ZSBjYW4gZHJvcCB0aGUgdWlkIGNvbHVtbiB0b28uCgpgYGB7cn0KZHJvcF9jb2x1bW5zIDwtIGMoIlgiLCJ0cyIsImxvY2FsX29yaWciLCJ1aWQiKQpteWRhdGEgPC0gbXlkYXRhWywgIW5hbWVzKG15ZGF0YSkgJWluJSBkcm9wX2NvbHVtbnNdCmBgYAoKYGBge3J9CmhlYWQobXlkYXRhKQpgYGAKClNvIHdlIGhhdmUgcmVtb3ZlZCB0aGUgY29sdW1ucyB0aGF0IGRpZG4ndCBwcm92aWRlIHVzIHdpdGggYW55IGV4dHJhIGluZm9ybWF0aW9uLiBXZSB3aWxsIG5vdyBleHRyYWN0IHRoZSBkYXRhIHdlIHdpbGwgdXNlIGZvciBEQlNDQU4gdG8gY3JlYXRlIGNsdXN0ZXJzLiBUaGUgZm9sbG93aW5nIGNvZGUgaXMgcHVsbGVkIGZyb20gQWxleCdzIHdvcmtib29rIGFuZCBhbGxvd3MgdXMgdG8gcHVsbCBvdXQgNyBvZiB0aGUgZmVhdHVyZXMgdG8gdXNlIGZvciBEQlNDQU4gYW5kIGVuc3VyZXMgYWxsIGVsZW1lbnRzIGFyZSBudW1lcmljLgoKYGBge3J9CiMgbWlzcy5tZSA8LSB2ZWN0b3IobGVuZ3RoID0gbnJvdyhteWRhdGEpKQojIG1pc3MubWUgPC0gcmVwKDAsIHRpbWVzID0gbnJvdyhteWRhdGEpKQojIGZvcihpIGluIDE6bnJvdyhteWRhdGEpKSB7CiMgCWlmKGlzLm5hKG15ZGF0YSRkdXJhdGlvbltpXSkpIHsgbWlzcy5tZVtpXSA8LSAxIH0KIyAJfQojIHN0cihteWRhdGEpCiMgbXlkYXRhLmdvb2QgPC0gYXMuZGF0YS5mcmFtZShjYmluZChpZC5vcmlnX3AgPSBteWRhdGEkaWQub3JpZ19wLCBpZC5yZXNwX3AgPSBteWRhdGEkaWQucmVzcF9wLCAKIyBvcmlnX3BrdHMgPSBteWRhdGEkb3JpZ19wa3RzLCBvcmlnX2lwX2J5dGVzID0gbXlkYXRhJG9yaWdfaXBfYnl0ZXMsIAojIHJlc3BfcGt0cyA9IG15ZGF0YSRyZXNwX3BrdHMsIHJlc3BfaXBfYnl0ZXMgPSBteWRhdGEkcmVzcF9pcF9ieXRlcykpCiMgbXlkYXRhLmdvb2Q8LSBjYmluZChteWRhdGEuZ29vZCwgbWlzcy5tZSkKIyBoZWFkKG15ZGF0YS5nb29kKQojIHN0cihteWRhdGEuZ29vZCkgIyBTaG91bGQgYmUgb25seSBpbnRzIGFuZCBudW1zCiMgCiMgZm9yKGkgaW4gMTpuY29sKG15ZGF0YS5nb29kKSkgeyBteWRhdGEuZ29vZFssaV0gPC0gYXMubnVtZXJpYyhteWRhdGEuZ29vZFssaV0pIH0KIyBzdHIobXlkYXRhLmdvb2QpCQkjIyBBbGwgc2hvdWxkIGJlIG51bXMgbm93CiMgIyBzdW0obXlkYXRhLmdvb2QkbWlzcy5tZSkvbnJvdyhteWRhdGEuZ29vZCkgIyMgODIuNyUgbWlzc2luZwoKYGBgCgpUaGUgZGF0YSBjbGVhbnNpbmcgQWxleCBwZXJmb3JtZWQgd2Fzbid0IHZlcnkgY29uZHVjaXZlIHRvIGFsbG93aW5nIG1lIHRvIGltcHV0ZSBkYXRhIHNvIEkgd2lsbCB1c2UgdGhlIGJhc2lzIG9mIGhpcyBidXQgbWFrZSBzb21lIHNtYWxsIGNoYW5nZXMuCmBgYHtyfQpteWRhdGEuZ29vZCA8LSBhcy5kYXRhLmZyYW1lKGNiaW5kKGlkLm9yaWdfcCA9IG15ZGF0YSRpZC5vcmlnX3AsIGlkLnJlc3BfcCA9IG15ZGF0YSRpZC5yZXNwX3AsIG9yaWdfcGt0cyA9IG15ZGF0YSRvcmlnX3BrdHMsIG9yaWdfaXBfYnl0ZXMgPSBteWRhdGEkb3JpZ19pcF9ieXRlcyxyZXNwX3BrdHMgPSBteWRhdGEkcmVzcF9wa3RzLCByZXNwX2lwX2J5dGVzID0gbXlkYXRhJHJlc3BfaXBfYnl0ZXMpKQoKbXlkYXRhLmdvb2QKYGBgCgpJIGRvbnQgd2FudCB0byBkcm9wIGFueSBkYXRhIHRoYXQgbWF5IGJlIGltcG9ydGFudCBzbyBJJ2xsIGFsc28gdXNlIHRoZSBwcm90b2NvbCwgY29ubmVjdGlvbiBzdGF0ZSBhbmQgaGlzdG9yeSBmZWF0dXJlcyBpbiBteSBhbmFseXNpcy4KCmBgYHtyfQpwcm90byA8LSBhcy5mYWN0b3IoYyhteWRhdGEkcHJvdG8pKQpwcm90byA8LSB1bmNsYXNzKHByb3RvKQoKY29ubl9zdGF0ZSA8LSBhcy5mYWN0b3IoYyhteWRhdGEkY29ubl9zdGF0ZSkpCmNvbm5fc3RhdGUgPC0gdW5jbGFzcyhjb25uX3N0YXRlKQoKaGlzdG9yeSA8LSBhcy5mYWN0b3IoYyhteWRhdGEkaGlzdG9yeSkpCmhpc3RvcnkgPC0gdW5jbGFzcyhoaXN0b3J5KQoKbXlkYXRhLmdvb2QkcHJvdG8gPC0gcHJvdG8KbXlkYXRhLmdvb2QkY29ubl9zdGF0ZSA8LSBjb25uX3N0YXRlCm15ZGF0YS5nb29kJGhpc3RvcnkgPC0gaGlzdG9yeQoKZm9yKGkgaW4gMTpuY29sKG15ZGF0YS5nb29kKSkgeyBteWRhdGEuZ29vZFssaV0gPC0gYXMubnVtZXJpYyhteWRhdGEuZ29vZFssaV0pIH0KCm15ZGF0YS5nb29kCmBgYAoKYGBge3J9CmRhdGFfbWlzc2luZyA8LSBhcy5kYXRhLmZyYW1lKGNiaW5kKGR1cmF0aW9uID0gbXlkYXRhJGR1cmF0aW9uLCBvcmlnX2J5dGVzID0gbXlkYXRhJG9yaWdfYnl0ZXMsIHJlc3BfYnl0ZXMgPSBteWRhdGEkcmVzcF9ieXRlcykpCgpkYXRhX21pc3NpbmcKYGBgClRoZSBiZWxvdyBjb2RlIGlzIEFsZXgncyBtZXRob2QgZm9yIDEwLWZvbGQgQ1YuIFNpbmNlIHdlIHJhbmRvbWx5IHNhbXBsZWQgdGhlIGludGlhbCBkYXRhIHNldCwgdGFraW5nIHRoZSB0b3AgOTAlIG9mIHRoZSBkYXRhIGZyYW1lIHdlIG5vdyBoYXZlIGlzIHN0aWxsIHRha2luZyBhIHJhbmRvbSBzdWJzZXQgc28gcmFuZG9taXNpbmcgdGhlIGRhdGEgcHVsbGVkIGZvciB0aGUgdHJhaW5pbmcvdGVzdGluZyBkYXRhIHNldCB3b250IGNoYW5nZSB0aGUgYWZmZWN0cy4gRG9pbmcgdGhpcyBsaWtlIHRoaXMgbWFrZXMgdGhlIGxhdHRlciBtZWFuIGltcHV0YXRpb24gbXVjaCBzaW1wbGVyLgoKYGBge3J9CiMgCSMjIFdlJ2xsIGRvIDEwLWZvbGQgQ1YgYW5kIHRoZW4gYXBwbHkgREJTQ0FOLCB0cmFpbmluZyBvbiA5MCUKIyBkZyA8LSBteWRhdGEuZ29vZAojIHJhbiA8LSBzYW1wbGUoMTpucm93KGRnKSwgMC45ICogbnJvdyhkZykpCiMgbm9yIDwtZnVuY3Rpb24oeCkgeyAoeCAtbWluKHgpKS8obWF4KHgpLW1pbih4KSkgICB9CiMgZGdfbm9ybSA8LSBhcy5kYXRhLmZyYW1lKGxhcHBseShkZywgbm9yKSkKIyAJIyBoZWFkKGRnX25vcm0pCiMgCiMgZGdfdHJhaW4gPC0gZGdfbm9ybVtyYW4sXSAJIyMgZXh0cmFjdCB0cmFpbmluZyBzZXQKIyBkZ190ZXN0IDwtIGRnX25vcm1bLXJhbixdICAgCSMjIGV4dHJhY3QgdGVzdGluZyBzZXQKIyBkZ190YXJnZXRfY2F0IDwtIGRnW3JhbiwgbmNvbChkZyldCiMgZGdfdGVzdF9jYXQgPC0gZGdbLXJhbiwgbmNvbChkZyldCmBgYAoKYGBge3J9CmRnX3RyYWluIDwtIG15ZGF0YS5nb29kWzE6cm91bmQoMC45Km5yb3cobXlkYXRhLmdvb2QpKSwgXQpkZ190ZXN0IDwtIG15ZGF0YS5nb29kW3RhaWwoMTpucm93KG15ZGF0YS5nb29kKSwgMC4xKm5yb3cobXlkYXRhLmdvb2QpKSwgXQoKZGdfdHJhaW5fbWlzc2luZyA8LSBkYXRhX21pc3NpbmdbMTpyb3VuZCgwLjkqbnJvdyhkYXRhX21pc3NpbmcpKSwgXQpkZ190ZXN0X21pc3Npbmc8LSBkYXRhX21pc3NpbmdbdGFpbCgxOm5yb3coZGF0YV9taXNzaW5nKSwgMC4xKm5yb3coZGF0YV9taXNzaW5nKSksIF0KCm5vciA8LWZ1bmN0aW9uKHgpeyAoeCAtbWluKHgpKS8obWF4KHgpLW1pbih4KSkgICB9CmRnX3RyYWluIDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KGRnX3RyYWluLCBub3IpKQpkZ190ZXN0IDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KGRnX3Rlc3QsIG5vcikpCmBgYAoKIyMgU1ZECgpOb3cgd2UgY2FuIGxvb2sgYXQgcnVubmluZyBEQlNDQU4gb24gb3VyIGRhdGEuIFdlIGZpcnN0IG5lZWQgdG8gcGVyZm9ybSBQQ0EgdG8gZmlndXJlIG91dCBob3cgbWFueSBwcmluY2lwbGUgY29tcG9uZW50cyB0byB1c2UgaW4gREJTQ0FOLgoKYGBge3J9CmRnX3RyYWluLnN2ZCA8LSBzdmQoZGdfdHJhaW4pCmBgYAoKYGBge3J9CnBsb3QoZGdfdHJhaW4uc3ZkJGQseGxhYj0iRWlnZW52YWx1ZSBpbmRleCIseWxhYj0iRWlnZW52YWx1ZSIsbG9nPSJ5IikKcGxvdChkZ190cmFpbi5zdmQkZCx4bGFiPSJFaWdlbnZhbHVlIGluZGV4Iix5bGFiPSJFaWdlbnZhbHVlIikKYGBgCgpQbG90dGluZyB3aXRoIHRoZSBkaWZmZXJlbnQgYXhpcyBnaXZlcyBhIHN0cmlraW5nIGRpZmZlcmVuY2UuIEknbGwgZm9sbG93IHRoZSBzaW1pbGFyIHBhdGggb2YgdXNpbmcgdGhlIGxvZyBheGlzIGFuZCB0aHVzIHVzaW5nIDUgcHJpbmNpcGFsIGNvbXBvbmVudHMgc2luY2UgdGhpcyBpcyB3aGVyZSB0aGUgZWxib3cgb2NjdXJzLgoKYGBge3J9Cm5wY3MgPSA1CmBgYAoKV2Ugbm93IHBsb3QgdGhlIFBDQSB0byB2aXN1YWxpc2UgdGhlIGNsdXN0ZXJzIGZvcm1lZCBoZXJlLiBXZSdyZSBub3QgcGxvdHRpbmcgYWNjb3JkaW5nIHRvIGFueSBjYXRlZ29yaWNhbCBkYXRhIGkuZS4gbm9ybWFsIHZzIG5vbi1ub3JtYWwgc28gd2UgbWF5IG5vdCBnZXQgdGhhdCBtdWNoIGluZm9ybWF0aW9uIGZyb20gdGhpcy4KCmBgYHtyfQppPTE7aj0yCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLGldLAogICAgIGRnX3RyYWluLnN2ZCR1WyxqXSx0eXBlPSJwIiwKICAgICBjb2w9IiMzMzMzMzMxMSIscGNoPTE2LGNleD0xKQpgYGAKCkFzIGEgcmVmbGVjdGlvbiwgYWxsIHRoZSBjb2RlIGluIHRoaXMgZG9jdW1lbnQgd2FzIGluaXRpYWxseSBydW4gb24gdGhlIHNhbWUgZGF0YSBidXQgd2l0aCB0aGUgbWlzcy5tZSBjb2x1bW4gZnJvbSBBbGV4J3MgY29kZSBhYm92ZSB3aGljaCBjcmVhdGVzIGEgZHJhc3RpYyBkaWZmZXJlbmNlIGluIHRoZSBvdXRwdXQgb2Ygc3ZkLiBJdCByZXN1bHRzIGluIHVzIG5lZWRpbmcgYW4gZXh0cmEgcHJpbmNpcGxlIGNvbXBvbmVudCBhbmQgcmVtb3ZlcyB0aGUgcGFyYWxsZWxvZ3JhbXMgZnJvbSB0aGUgcGxvdCBhYm92ZSAtIHRoZXJlZm9yZSBJIHdvdWxkIGFzc3VtZSB0aGF0ICdtaXNzaW5nbmVzcycgaGFzIGEgcmVzdWx0IG9uIGNsdXN0ZXJzIGFuZCBpcyB0aGVyZWZvcmUgZGVwZW5kZW50IG9uIHdoaWNoIGNsdXN0ZXIgYSBkYXRhIHBvaW50IGlzIHBsYWNlZCBpbnRvLiBTaW5jZSB3ZSBhcmUgdHJ5aW5nIHRvIGltcHV0ZSB0aGUgbWlzc2luZyBkYXRhIEknbSBnb2luZyB0byB1c2UgY29tcGxldGUgY2FzZSBhbmFseXNpcyBhbmQgcGVyZm9ybSBjbHVzdGVyaW5nIHdpdGhvdXQgcmVmZXJlbmNlIHRvIGFueSBtaXNzaW5nbmVzcy4KCiMjIEZpbmRpbmcgUGFyYW1ldGVycyBmb3IgREJTQ0FOCgpFcHMgc3BlY2lmaWVzIGhvdyBjbG9zZSB0aGUgcG9pbnRzIHNob3VsZCBiZSB0byBlYWNoIG90aGVyIHRvIGZvcm0gYSBjbHVzdGVyLiBJZiB0aGUgZGlzdGFuY2UgaXMgbGVzcyB0aGFuIGVwcywgdGhleSBhcmUgY29uc2lkZXJlZCBuZWlnaGJvdXJzLiBXZSBmaW5kIHRoaXMgbnVtYmVyIGJ5IGZpbmRpbmcgdGhlICdrbmVlJyBpbiB0aGUgcGxvdCBiZWxvdy4gSSBoYXZlIGNob3NlbiB0byB1c2UgMTAgKGRpbSsxKSBuZWlnaGJvdXJzIGhlcmUuCgpgYGB7cn0KdGVzdD1rTk5kaXN0KGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLCBrID0gMTAsIGFsbD1UUlVFKQp0ZXN0bWluPWFwcGx5KHRlc3QsMSxtaW4pCmBgYAoKYGBge3J9CnBsb3Qoc29ydCh0ZXN0bWluW3Rlc3RtaW4+MWUtOF0pLGxvZz0ieSIpCnRocmVzaGhvbGRzPSBjKDAuMDEsMC4wMDEsMC4wMDAxLDAuMDAwMDEsMC4wMDAwMDEpCmFibGluZShoPWMoMC4wMSwwLjAwMSwwLjAwMDEsMC4wMDAwMSwwLjAwMDAwMSkpCmFibGluZShoPTAuMDAwMSwgY29sPSJyZWQiKQpgYGAKClNvIHdlIGNob29zZSBoPTAuMDAwMSBhcyBvdXIgbGltaXQgc2luY2UgdGhpcyBhbGxvd3MgdXMgdG8gY2FwdHVyZSBtb3N0IG9mIHRoZSBpbmZvcm1hdGlvbiBoZXJlLiBXZSBhbHNvIG5lZWQgdG8gZGVmaW5lIG91ciBtaW5pbXVtIG51bWJlciBvZiBwb2ludHMgdG8gZm9ybSBhIGNsdXN0ZXIuIFRoZSByZWNvbW1lbmRhdGlvbiBpcyB0byB1c2UgbWluUHRzID0gMipkaW0gZm9yIGxhcmdlIGRhdGEgc2V0cyB0byBlbnN1cmUgd2UgZmluZCBzaWduaWZpY2FudCBjbHVzdGVycyBidXQgd2UnbGwgbG9vayBhdCBhIHJhbmdlIHRvIHNlZSB3aGF0IG91dHB1dHMgd2UgY291bGQgZ2V0LiBBcyBhIHJlZmVyZW5jZSwgQWxleCBpcyB1c2luZyAxNSBjbHVzdGVycyBzbyB3ZSdsbCBhaW0gdG8gcmVkdWNlIG91ciBkYXRhIHNldCBkb3duIHRvIHRoYXQgbWFueSBidXQgdGhpcyBpcyBkZXBlbmRlbnQgb24gaG93IHRoYXQgY2x1c3RlcmluZyBsb29rcyBhbmQgcGVyZm9ybXMgZm9yIG1lYW4gaW1wdXRhdGlvbi4KCiMjIERCU0NBTgoKTm93IHdlIGZpbmFsbHkgcGVyZm9ybSBEQlNDQU4uCgpgYGB7cn0KbWluUHRzID0gYygyMCwgMjUsIDMwLCAzNSwgNDAsIDQ1LCA1MCwgNzUsIDEwMCwgMTI1LCAxNTAsIDE3NSwgMjAwLCAyMjUsIDI1MCwgMzAwLCA0MDApCmNsdXN0ZXJjb3VudHMgPSBjKCkKCmZvcih2YWwgaW4gbWluUHRzKSB7CiAgZGJzY2FucmVzID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcyA9IDAuMDAwMSxtaW5QdHMgPSB2YWwpCiAgY2x1c3RlcmNvdW50c1t2YWxdIDwtIChsZW5ndGgodW5pcXVlKGRic2NhbnJlcyRjbHVzdGVyKSkpCn0KYGBgCgpgYGB7cn0KY2x1c3RlcmNvdW50cwpgYGAKClRoZSBhbW91bnQgb2YgY2x1c3RlcnMgd2Ugb2J0YWluIHN0YWJpbGl6ZXMgc29tZXdoZXJlIGFyb3VuZCAyMDAgbWluIHBvaW50cyBzaW5jZSB3ZSBnZXQgaW5mbGVjdGlvbnMgYXJvdW5kIHRoaXMgcG9pbnQuIFdlJ2xsIHZpc3VhbGlzZSB0aGVtIGFsbCB0byBzZWUgd2hhdCB0aGV5IGxvb2sgbGlrZSBhbmQgZ2l2ZSBhIGNvbXBhcmlzb24uIFRvIGNyZWF0ZSBzaW1pbGFyaXR5IGJldHdlZW4gdGhpcyBhbmQgQWxleCdzIGNsdXN0ZXJpbmcgSSBtYXkgdXNlIDIwMCBtaW4gUG9pbnRzIGJ1dCB3ZSdsbCByZWZsZWN0IG9uIHRoaXMgYWZ0ZXIgdGhlIHZpc3VhbGlzYXRpb25zLgoKYGBge3J9CmRic2NhbjQwMCA9IGRic2NhbihkZ190cmFpbi5zdmQkdVssMTpucGNzXSxlcHM9MC4wMDAxLCBtaW5QdHMgPSA0MDApCmRic2NhbjIwMCA9IGRic2NhbihkZ190cmFpbi5zdmQkdVssMTpucGNzXSxlcHMgPSAwLjAwMDEsbWluUHRzID0gMjAwKQpkYnNjYW4xNzUgPSBkYnNjYW4oZGdfdHJhaW4uc3ZkJHVbLDE6bnBjc10sZXBzPTAuMDAwMSxtaW5QdHMgPSAxNzUpCmRic2NhbjUwID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcz0wLjAwMDEsbWluUHRzID0gNTApCmRic2NhbjMwID0gZGJzY2FuKGRnX3RyYWluLnN2ZCR1WywxOm5wY3NdLGVwcz0wLjAwMDEsIG1pblB0cyA9IDMwKQpgYGAKCmBgYHtyfQojIHRyeWluZyB0byBjYWxjdWxhdGUgdGhlIHNpbGhvdWV0dGUgc2NvcmUgb2YgdGhpcyBjbHVzdGVyaW5nIHRvIHNlZSBpZiBpdHMgdmFsaWQgb3Igbm90IC0gY3VycmVudGx5IHJlcG9ydHMgRXJyb3I6IFZlY3RvciBtZW1vcnkgZXhoYXVzdGVkIChsaW1pdCByZWFjaGVkPykgLSBJJ3ZlIHRyaWVkIGxvb2tpbmcgaW50byB3b3JrIGFyb3VuZHMgYnV0IGNhbnQgZ2V0IGFueXRoaW5nIHdvcmtpbmcgc28gSSdsbCBsZWF2ZSB0aGlzIGZvciBub3cuCiNzcyA8LSBzaWxob3VldHRlKGRic2NhbjIwMCRjbHVzdGVyLCBkaXN0KGRnX3RyYWluLnN2ZCR1KSkKYGBgCgojIyBQbG90dGluZyByZXN1bHRpbmcgY2x1c3RlcnMKCgpgYGB7cn0KcG5nKGZpbGUgPSAiREJTQ0FONDAwIHBsb3RzLnBuZyIpCm9wPC0gcGFyKG1mcm93PWMoMiw1KSkKZm9yIChrIGluIDE6NCl7CiAgICBhID0gc2VxKGsrMSw1KQogICAgZm9yIChsIGluIGEpewogICAgICAgIGlmKGs9PWwpe25leHR9CiAgICAgICAgcGxvdChkZ190cmFpbi5zdmQkdVssa10sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WyxsXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNDAwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCiAgICB9Cn0KcGFyKG9wKQpkZXYub2ZmKCkKYGBgCgoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjIwMCBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjIwMCRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQogICAgfQp9CnBhcihvcCkKZGV2Lm9mZigpCmBgYAoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjE3NSBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjE3NSRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQogICAgfQp9CnBhcihvcCkKZGV2Lm9mZigpCmBgYAoKYGBge3J9CnBuZyhmaWxlID0gIkRCU0NBTjUwIHBsb3RzLnBuZyIpCm9wPC0gcGFyKG1mcm93PWMoMiw1KSkKZm9yIChrIGluIDE6NCl7CiAgICBhID0gc2VxKGsrMSw1KQogICAgZm9yIChsIGluIGEpewogICAgICAgIGlmKGs9PWwpe25leHR9CiAgICAgICAgcGxvdChkZ190cmFpbi5zdmQkdVssa10sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WyxsXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNTAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKICAgIH0KfQpwYXIob3ApCmRldi5vZmYoKQpgYGAKCmBgYHtyfQpwbmcoZmlsZSA9ICJEQlNDQU4zMCBwbG90cy5wbmciKQpvcDwtIHBhcihtZnJvdz1jKDIsNSkpCmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoZGdfdHJhaW4uc3ZkJHVbLGtdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssbF0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjMwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCiAgICB9Cn0KcGFyKG9wKQpkZXYub2ZmKCkKYGBgCgoKTGV0cyBjb21wYXJlIHRoZSBmaXJzdCBwbG90IGZvciBlYWNoIG9mIHRoZSBmb3VyIGNsdXN0ZXJpbmcncyB3ZSBwZXJmb21lZC4KCmBgYHtyfQpwbG90KGRnX3RyYWluLnN2ZCR1WywxXSwKICAgICAgICAgICAgZGdfdHJhaW4uc3ZkJHVbLDJdLHhsYWI9IiIsCiAgICAgICAgICAgIHlsYWI9IiIsIG1haW49Im1pblB0cyA9IDMwLCBDbHVzdGVycyA9IDY5IiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjMwJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLDFdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssMl0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwgbWFpbj0ibWluUHRzID0gNTAsIENsdXN0ZXJzID0gOTUiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuNTAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKcGxvdChkZ190cmFpbi5zdmQkdVssMV0sCiAgICAgICAgICAgIGRnX3RyYWluLnN2ZCR1WywyXSx4bGFiPSIiLAogICAgICAgICAgICB5bGFiPSIiLCBtYWluPSJtaW5QdHMgPSAxNzUsIENsdXN0ZXJzID0gMzIiLAogICAgICAgICAgICBjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuMTc1JGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCnBsb3QoZGdfdHJhaW4uc3ZkJHVbLDFdLAogICAgICAgICAgICBkZ190cmFpbi5zdmQkdVssMl0seGxhYj0iIiwKICAgICAgICAgICAgeWxhYj0iIiwgbWFpbj0ibWluUHRzID0gMjAwLCBDbHVzdGVycyA9IDE3IiwKICAgICAgICAgICAgY29sPWMoIiM2NjY2NjY2NiIscmFpbmJvdyg0MSkpW2Ric2NhbjIwMCRjbHVzdGVyKzFdLHBjaD0xOSxjZXg9MC41KQpwbG90KGRnX3RyYWluLnN2ZCR1WywxXSwKICAgICAgICAgICAgZGdfdHJhaW4uc3ZkJHVbLDJdLHhsYWI9IiIsCiAgICAgICAgICAgIHlsYWI9IiIsIG1haW49Im1pblB0cyA9IDQwMCwgQ2x1c3RlcnMgPSAxMyIsCiAgICAgICAgICAgIGNvbD1jKCIjNjY2NjY2NjYiLHJhaW5ib3coNDEpKVtkYnNjYW40MDAkY2x1c3RlcisxXSxwY2g9MTksY2V4PTAuNSkKYGBgCgpUaHVzIHdoZW4gY2x1c3RlcmluZyB1c2luZyBsYXJnZXIgbWluUHRzLCB3ZSBhcHBlYXIgdG8gY2x1c3RlciB0aGUgbWFqb3JpdHkgb2YgcG9pbnRzIGludG8gY2x1c3RlciAwIGkuZSB0aGUgZ3JleSBibG9jayBpbiB0aGUgZmlndXJlcy4gV2UgZ2V0IGEgbWVyZ2luZyBvZiBjbHVzdGVycyBiZXR3ZWVuIDMwIG1pbiBwb2ludHMgYW5kIDIwMCBtaW4gcG9pbnRzLiBXaGVuIHBlcmZvcm1pbmcgbWVhbiBpbXB1dGF0aW9uLCB3ZSBjYW4gdGh1cyBlaXRoZXIgd29yayB3aXRoIGEgbGFyZ2UgYW1vdW50IG9mIGNsdXN0ZXJzIGkuZS4gd2hlbiB0aGUgbWluUHRzIGlzIHNtYWxsIH4zMCBvciBmZXdlciBjbHVzdGVycyBidXQgaGF2ZSB0aGUgbWFqb3JpdHkgb2YgcG9pbnRzIGluIGEgc2luZ2xlIGNsdXN0ZXIgaS5lLiB3aGVuIHRoZSBtaW5QdHMgaXMgbGFyZ2UgfjE3NS4KCiMjIEltcHV0YXRpb24KCldlJ2xsIHVzZSB0aGUgY2x1c3RlcmluZyB3aXRoIDIwMCBtaW4gcG9pbnRzLiBUaGlzIGFsbG93cyB1cyB0byBrZWVwIGNsb3NlIHRvIHRoZSB3YXkgdGhhdCBBbGV4IGhhcyBkb25lIGl0IHdpdGggMTUgY2x1c3RlcnMgYW5kIGVuc3VyZXMgdGhhdCB3ZSdyZSBsaWtlbHkgZW5vdWdoIHRvIGhhdmUgZGF0YSBpbiBlYWNoIGNsdXN0ZXIgdG8gYWxsb3cgdXMgdG8gaW1wdXRlIG1pc3NpbmduZXNzLgoKYGBge3J9CmRic2NhbjIwMApgYGAKCgpgYGB7cn0KZGdfdHJhaW4uY2x1c3RlcmVkIDwtIGRhdGEuZnJhbWUoZGdfdHJhaW4pCgpkZ190cmFpbi5jbHVzdGVyZWQkY2x1c3RlciA8LSBkYnNjYW4yMDAkY2x1c3RlcgoKZGdfdHJhaW4uY2x1c3RlcmVkCmBgYAoKYGBge3J9CmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkIDwtIGRhdGEuZnJhbWUoZGdfdHJhaW5fbWlzc2luZykKCmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPC0gZGJzY2FuMjAwJGNsdXN0ZXIKCmRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkCmBgYAoKV2UgbmVlZCB0byBjaGVjayB0byBzZWUgaWYgd2UgY2FuIHBlcmZvcm0gaW1wdXRhdGlvbi4gSWYgYWxsIHRoZSB2YWx1ZXMgaW4gYSBjbHVzdGVyIGhhdmUgbi9hIHRoZW4gd2Ugd29udCBiZSBhYmxlIHRvIHBlcmZvcm0gdGhlIGltcHV0YXRpb24gYW5kIHRoZXJlZm9yZSBtYXkgbmVlZCB0byBjb25zaWRlciBjaGFuZ2luZyB0aGUgY2x1c3RlcmluZy4KCmBgYHtyfQpmb3IoaSBpbiAwOjE2KXsKICBhIDwtIGRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkW2RnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPT0gaSxdCgogIGIgPC0gY29sU3Vtcyhpcy5uYShhKSkvbnJvdyhhKQogIAogIGlmKGJbImR1cmF0aW9uIl0gPT0gMSl7CiAgICBwcmludChwYXN0ZTAoIkNsdXN0ZXIgIiwgaSwgIiBoYXMgbm8gbm9uIG5hIHZhbHVlKHMpIikpCiAgfQp9CmBgYAoKV2Ugc2VlIGhlcmUgdGhhdCBhbGwgYnV0IDEgY2x1c3RlciBoYXMgdmFsdWVzIHRoYXQgYWxsb3cgdXMgdG8gaW1wdXRlLiBDbHVzdGVyIDQgaGFzIGFsbCBuL2EgdmFsdWVzIGFuZCB0aHVzIHdlIGNhbnQgdXNlIG1lYW4gaW1wdXRhdGlvbiB0byBmaWd1cmUgb3V0IHdoYXQgdGhlc2UgdmFsdWVzIHNob3VsZCBiZS4gV2UnbGwgY29uc2lkZXIgb3RoZXIgd2F5cyBvZiBpbXB1dGluZyBzb2xlbHkgZm9yIHRoaXMgY2x1c3RlciBhZnRlciB3ZSd2ZSBpbXB1dGVkIGZvciB0aGUgb3RoZXIgY2x1c3RlcnMuIE5vdGUgdGhhdCBub25lIG9mIHRoZSBvdGhlciB0ZXN0ZWQgY2x1c3RlcnMgcmVzdWx0IGluIGJldHRlciBvcHRpb25zLiBBbGwgdGhlIG90aGVyIGNsdXN0ZXJpbmcncyByZXN1bHQgaW4gbW9yZSBjbHVzdGVycyB3aXRoIG5vIHZhbHVlcyBlLmcuIGRic2NhbjQwMCBoYXMgMiBjbHVzdGVycyB3aXRoIGZ1bGwgbWlzc2luZ25lc3MgYW5kIGRic2NhbjMwIGhhcyAyMiBjbHVzdGVycyB3aXRoIGZ1bGwgbWlzc2luZ25lc3MuCgpgYGB7cn0KZm9yKGkgaW4gMDoxNil7CiAgYXNzaWduKHBhc3RlMCgiY2x1c3RlciIsaSksIGRnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkW2RnX3RyYWluX21pc3NpbmcuY2x1c3RlcmVkJGNsdXN0ZXIgPT0gaSxdKQp9CmBgYAoKYGBge3J9CmNsdXN0ZXJzIDwtIGMoY2x1c3RlcjAsY2x1c3RlcjEsY2x1c3RlcjIsY2x1c3RlcjMsY2x1c3RlcjQsY2x1c3RlcjUsY2x1c3RlcjYsY2x1c3RlcjcsY2x1c3RlcjgsY2x1c3RlcjksY2x1c3RlcjEwLGNsdXN0ZXIxMSxjbHVzdGVyMTIsY2x1c3RlcjEzLGNsdXN0ZXIxNCxjbHVzdGVyMTUsY2x1c3RlcjE2KQpgYGAKCldlJ2xsIHBsb3QgdGhlIGZpcnN0IGNsdXN0ZXIgaW4gYSBib3ggcGxvdCB0byB2aXN1YWxpc2Ugb3V0bGllcnMgYW5kIGFsc28gYXMgYSBjb21wYXJpc29uIGZvciBsYXRlci4KYGBge3J9Cm1lbHREYXRhIDwtIG1lbHQoY2x1c3RlcjApCnAgPC0gZ2dwbG90KG1lbHREYXRhLCBhZXMoZmFjdG9yKHZhcmlhYmxlKSwgdmFsdWUpKSAKZ2d0aXRsZShjKCJDbHVzdGVyOiAwIikpCnAgKyBnZW9tX2JveHBsb3QoKSArIGZhY2V0X3dyYXAofnZhcmlhYmxlLCBzY2FsZT0iZnJlZSIpCmBgYAoKYGBge3J9CgpkbWVhbnMgPSBjKCkKb2JtZWFucyA9IGMoKQpyYm1lYW5zID0gYygpCgpmb3IoaSBpbiAxOjE3KXsKICBhIDwtIGFzLmRhdGEuZnJhbWUoYyhjbHVzdGVyc1s0KmktM10sY2x1c3RlcnNbNCppLTJdLGNsdXN0ZXJzWzQqaS0xXSxjbHVzdGVyc1s0KmldKSkKICBtIDwtIGNvbE1lYW5zKGEsbmEucm0gPSBUUlVFKQogIHByaW50KHBhc3RlMCgiQ3VycmVudGx5IHdvcmtpbmcgb24gY2x1c3RlciAiLGktMSwgIi4iKSkKICAKICBkbWVhbnMgPSBjKGRtZWFucyxtWzFdKQogIG9ibWVhbnMgPSBjKG9ibWVhbnMsbVsyXSkKICByYm1lYW5zID0gYyhyYm1lYW5zLG1bM10pCiAgCiAgZm9yKGsgaW4gKDE6MykpewogICAgZm9yKGogaW4gKDE6bnJvdyhhKSkpewogICAgICBpZihpcy5uYShhW2osa10pKXsKICAgICAgICBhW2osa10gPSBtW2tdCiAgICAgICAgCiAgICAgIH0KICAgIH0KICB9CiAgIGFzc2lnbihwYXN0ZTAoImNsdXN0ZXIiLGktMSksYSkKfQpgYGAKCldlJ2xsIGZpbmFsbHkgZ2V0IHRoZSB0YWJsZSBvZiBtZWFucyB0aGF0IHdlIHdhbnRlZC4gVGhpcyBnaXZlcyB1cyB0aGUgbWVhbiBvZiBlYWNoIG1pc3NpbmcgY29sdW1uIGFuZCB0aGUgY2x1c3RlciB0aGV5J3JlIGZyb20uCgpgYGB7cn0KbWVhbnMgPC0gZGF0YS5mcmFtZSgiY2x1c3RlciIgPSBzZXEoMCwxNiksICJkdXJhdGlvbiBtZWFucyI9IGRtZWFucywgIm9yaWdpbl9ieXRlcyBtZWFucyIgPSBvYm1lYW5zLCAicmVzcF9ieXRlcyBtZWFucyAiID0gcmJtZWFucykKYGBgCgpgYGB7cn0KcGRmKCJtZWFucy5wZGYiLCBoZWlnaHQ9MTEsIHdpZHRoPTEwKQpncmlkLnRhYmxlKG1lYW5zKQpkZXYub2ZmKCkKCm1lYW5zCmBgYAoKRmluYWxseSwgd2UnbGwgdGVzdCB0byBzZWUgaG93IHRoaXMgaW1wdXRhdGlvbiBoYXMgd29ya2VkLiBXZSdsbCBsb29rIGF0IHRoZSBlcnJvciBpLmUuIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gdGhlIG1lYW5zIHByb2R1Y2VkIGZyb20gdGhlIGNsdXN0ZXJlZCB0cmFpbmluZyBkYXRhIGFuZCB0aGUgdHJhaW5pbmcgZGF0YSB3ZSdsbCBjbHVzdGVyIG5vdy4gV2UnbGwgdXNlIHRoZSBzYW1lIHBhcmFtZXRlcnMgYXMgZGVmaW5lZCBhYm92ZSB0byBtYWludGFpbiBjb25zaXN0ZW5jeSAtIGlmIHdlIHdlcmUgdG8gY2hlY2sgdGhlc2UgcGFyYW1ldGVycywgd2Ugc2hvdWxkIHNlZSBzaW1pbGFyIG9uZXMgc2luY2UgdGhleSBhcmUgYm90aCByYW5kb20gc2FtcGxlcyBvZiB0aGUgZGF0YS4KCmBgYHtyfQpkZ190ZXN0LnN2ZCA8LSBzdmQoZGdfdGVzdCkKYGBgCgpgYGB7cn0KaT0xO2o9MgpwbG90KGRnX3Rlc3Quc3ZkJHVbLGldLAogICAgIGRnX3Rlc3Quc3ZkJHVbLGpdLHR5cGU9InAiLAogICAgIGNvbD0iIzMzMzMzMzExIixwY2g9MTYsY2V4PTEpCmBgYAoKYGBge3J9CmRic2NhbjIwMFRlc3QgPSBkYnNjYW4oZGdfdGVzdC5zdmQkdVssMTpucGNzXSxlcHMgPSAwLjAwMDEsbWluUHRzID0gMjAwKQpgYGAKCmBgYHtyfQpkYnNjYW4yMDBUZXN0CmBgYAoKClNvIHdoYXQgd2UgZmluZCBpcyB0aGF0IHRoZSBjbHVzdGVyaW5nIGZvciB0aGUgdGVzdCBzcGxpdCBwdXRzIGFsbCAyMDAwMCBkYXRhIHBvaW50cyBpbnRvIHRoZSBmaXJzdCBjbHVzdGVyLiBXZSdsbCBoYXZlIGEgbG9vayBhdCB3aGF0IHJlc3VsdCB0aGlzIGdpdmVzIGJ1dCB0aGlzIHVsdGltYXRlbHkgbG9va3MgbGlrZSBpdCB3b250IHJlc3VsdCBpbiBhbnkgZnJ1aXRmdWwgY29tcGFyaXNvbiB0byBzZWUgaG93IHdlbGwgREJTQ0FOIHBlcmZvcm1lZC4KCmBgYHtyfQpkZ190ZXN0LmNsdXN0ZXJlZCA8LSBkYXRhLmZyYW1lKGRnX3Rlc3QpCgpkZ190ZXN0LmNsdXN0ZXJlZCRjbHVzdGVyIDwtIGRic2NhbjIwMFRlc3QkY2x1c3RlcgoKZGdfdGVzdC5jbHVzdGVyZWQKYGBgCgpgYGB7cn0KZGdfdGVzdF9taXNzaW5nLmNsdXN0ZXJlZCA8LSBkYXRhLmZyYW1lKGRnX3Rlc3RfbWlzc2luZykKCmRnX3Rlc3RfbWlzc2luZy5jbHVzdGVyZWQkY2x1c3RlciA8LSBkYnNjYW4yMDBUZXN0JGNsdXN0ZXIKCmRnX3Rlc3RfbWlzc2luZy5jbHVzdGVyZWQKYGBgCgpgYGB7cn0KY2x1c3RlcjB0ZXN0bWVhbnMgPC0gYXMuZGF0YS5mcmFtZShjb2xNZWFucyhkZ190ZXN0X21pc3NpbmcuY2x1c3RlcmVkLCBuYS5ybSA9IFRSVUUpKQoKY2x1c3RlcjB0ZXN0bWVhbnMKCmBgYAoKYGBge3J9CmNsdXN0ZXIwdHJhaW5tZWFucyA8LSBtZWFuc1sxLF0KYGBgCgoKYGBge3J9CmRpZmZtZWFucyA9IGMoKQoKZm9yKGkgaW4gMjo0KXsKICB0cmFpbm0gPC0gY2x1c3RlcjB0cmFpbm1lYW5zW2ldCiAgdGVzdG0gPC0gY2x1c3RlcjB0ZXN0bWVhbnNbaS0xLF0KICBkaWZmIDwtIDEgLSAodGVzdG0vdHJhaW5tKQogIGRpZmZtZWFucyA8LSBjKGRpZmZtZWFucywgZGlmZikKfQpgYGAKCmBgYHtyfQphcy5kYXRhLmZyYW1lKGRpZmZtZWFucykKYGBgCgpUaHVzIHdlIGhhdmUgYSB2ZXJ5IGxhcmdlIGRpZmZlcmVuY2UgaW4gdGhlIG1lYW5zIG9mIG91ciB0cmFpbmluZyBkYXRhIGFuZCB0aGUgbWVhbnMgb2YgdGVzdCBkYXRhIGFuZCB0aHVzIHdlIG1heSBhc3N1bWUgdGhhdCBEQlNDQU4gaW4gdGhpcyBjYXNlIGRvZXNuJ3QgcGVyZm9ybSB2ZXJ5IHdlbGwuCgpGaW5hbGx5LCB3ZSdyZSBnb2luZyB0byB2aXN1YWxpc2UgdGhlIGRhdGEgdXNpbmcgdC1TTkUgcHJvamVjdGlvbi4gVGhlIHBsb3RzIGFib3ZlIGhlbHAgdXMgdW5kZXJzdGFuZCB0aGUgZGF0YSBidXQgYXJlIGhhcmQgdG8gaW5mZXIgYW55dGhpbmcgZnJvbS4gV2UnbGwgdmlzdWFsaXNlIHRoZSBEQlNDQU4yMDAgZGF0YSBiZWxvdy4KCmBgYHtyfQpydHNuZV9vdXQgPC0gUnRzbmUoYXMubWF0cml4KGRnX3RyYWluLmNsdXN0ZXJlZCksIHBjYSA9IEZBTFNFLCB2ZXJib3NlID0gVFJVRSwgY2hlY2tfZHVwbGljYXRlcyA9IEZBTFNFKQpgYGAKCmBgYHtyfQpwbG90KHJ0c25lX291dCRZLCBhc3AgPSAxLCBwY2ggPSAyMCwgCiAgICAgY2V4ID0gMC4xLCBjZXguYXhpcyA9IDEuMjUsIGNleC5sYWIgPSAxLjI1LCBjZXgubWFpbiA9IDEuNSwgCiAgICAgeGxhYiA9ICJ0LVNORSBkaW1lbnNpb24gMSIsIHlsYWIgPSAidC1TTkUgZGltZW5zaW9uIDIiLCAKICAgICBtYWluID0gIjJEIHQtU05FIHByb2plY3Rpb24iLGNvbD1jKCIjNjY2NjY2NjYiLHJhaW5ib3coNDEpKVtkYnNjYW4yMDAkY2x1c3RlcisxXSkKYGBgCgoKV2UnbGwgYWxzbyBsb29rIGF0IGEgcGxvdCB1c2luZyB1bWFwLgoKYGBge3J9CmRhdGEudW1hcCA8LSB1bWFwKGRnX3RyYWluLmNsdXN0ZXJlZCwgaW5pdD0ic3BlY3RyYWwiKQpgYGAKCmBgYHtyfQpwbG90KGRhdGEudW1hcCwgYXNwID0gMSwgcGNoID0gMjAsIAogICAgIGNleCA9IDAuMiwgY2V4LmF4aXMgPSAxLjI1LCBjZXgubGFiID0gMS4yNSwgY2V4Lm1haW4gPSAxLjUsIAogICAgIG1haW4gPSAiMkQgdW1hcCBwcm9qZWN0aW9uIixjb2w9YygiIzY2NjY2NjY2IixyYWluYm93KDQxKSlbZGJzY2FuMjAwJGNsdXN0ZXIrMV0pCmBgYAoKVGhlIGRpZmZlcmVuY2UgaXMgc3RhcnRsaW5nLiBXaGVyZWFzIHRoZSB0c25lIHBsb3QgbG9va3MgZmFpcmx5IGp1bWJsZWQgd2l0aCBjbHVzdGVycywgd2l0aCBubyBjbHVzdGVycyBhY3R1YWxseSBzZWVtaW5nIHRvIGFwcGVhciBhbmQgbW9yZSBzY2F0dGVyaW5nIHdpdGhpbiBpdCwgdGhlIHVtYXAgcGxvdCBoYXMgdmVyeSBkaXNjcmV0ZSBjbHVzdGVycyBhbmQgZ2l2ZXMgYSBtdWNoIGJldHRlciB2aXN1YWxpc2F0aW9uLiBXZSBnZXQgc29tZSBzY2F0dGVyaW5nIGJldHdlZW4gY2x1c3RlcnMgd2l0aCBncmV5L3JlZCBwb2ludHMgb2NjYXNpb25hbGx5IHNob3dpbmcgdXAgd2hlcmUgd2UgZG9uJ3QgbmVjZXNzYXJpbHkgZXhwZWN0IHRoZW0gYnV0IG92ZXJhbGwgdGhlIGNsdXN0ZXJzIGxvb2sgdmVyeSBpbmRlcGVuZGVudC4gV2l0aCB0aGlzIGluIG1pbmQsIEkgd291bGQgcHJlc3VtZSB0aGF0IHRoZSBjbHVzdGVyaW5nIHdpdGggYSBtaW5pbXVtIHBvaW50cyBvZiAyMDAgZG9lcyBwcm9kdWNlIHZhbGlkIGNsdXN0ZXJzIGFuZCBpcyBhIGdvb2Qgd2F5IHRvIHBlcmZvcm0gaW1wdXRhdGlvbiBiYXNlZCBvbiBjbHVzdGVycywgZGVzcGl0ZSBzb21lIG9mIHRoZSBlYXJsaWVyIGlzc3VlcyB0aGF0IG1heSBzdGlsbCBiZSB2YWxpZC4gQWRkaXRpb25hbGx5LCB0aGUgdW1hcCBwcm9qZWN0aW9uIGlzIGluY3JlZGlibHkgZmFzdCBjb21wYXJlZCB0byB0aGUgdHNuZSBwcm9qZWN0aW9uIGFuZCB0aGVyZWZvcmUgaXMgY29tcHV0YXRpb25hbGx5IG1vcmUgdXNlZnVsLgoKIyMgTSBtYXRyaXggLSBOb3QgdXNlZAoKKipUaGUgYmVsb3cgY29kZSBpcyBleHBsYWluZWQgYW5kIHJ1biB0aHJvdWdoIGJ1dCBpc24ndCB1c2VkIGluIHRoZSBmaW5hbCByZXBvcnQuIEl0IHNlcnZlZCBhcyBhIGdvb2QgcGllY2Ugb2Ygd29yayB0byBoZWxwIG1lIGRldmxvcCBteSBza2lsbHMgd2l0aCBEQlNDQU4gc28gd2lsbCBiZSBsZWZ0IGluIGJ1dCBpc24ndCBwYXJ0aWN1bGFybHkgaW5zaWdodGZ1bC4qKgoKTm93IHdlIGxvb2sgYXQgdGhlIE0gbWF0cml4IHByb2R1Y2VkIGJ5IEFsZXggdGhhdCBpcyBhIHNwYXJzZSBtYXRyaXggc2hvd2luZyBjb25uZWN0aW9ucyBiZXR3ZWVuIG9yaWdpbiBJUCdzIGFuZCByZXNwb25zZSBJUCdzLgoKYGBge3J9ClNvMSA8LSB0YXBwbHkobXlkYXRhJGlkLm9yaWdfaCwgbXlkYXRhJGlkLm9yaWdfaCkKRGUxIDwtIHRhcHBseShteWRhdGEkaWQucmVzcF9oLCBteWRhdGEkaWQucmVzcF9oKQpFc3QgPC0gYXMubWF0cml4KGNiaW5kKFNvMSwgRGUxKSkKTTwtIHNwYXJzZU1hdHJpeChpPUVzdFssMV0sIGo9RXN0WywyXSkKYGBgCgpgYGB7cn0KTS5zdmQgPSBzdmQoTSkKYGBgCgpgYGB7cn0KcGxvdChNLnN2ZCRkLHhsYWI9IkVpZ2VudmFsdWUgaW5kZXgiLHlsYWI9IkVpZ2VudmFsdWUiLGxvZz0ieSIpCnBsb3QoTS5zdmQkZCx4bGFiPSJFaWdlbnZhbHVlIGluZGV4Iix5bGFiPSJFaWdlbnZhbHVlIikKYGBgCgpGcm9tIHRoZSBsb2cgYXhpcyBpdCBsb29rcyBsaWtlIHdlIG5lZWQgdGhlIGZpcnN0IH4xMDAgZWlnZW52YWx1ZXMgYnV0IHVzaW5nIHRoZSBub3JtYWwgcGxvdCwgaXQgbG9va3MgbGlrZSB3ZSBhbiBnZXQgYXdheSB3aXRoIHVzaW5nIH4zMC4KCmBgYHtyfQpucGNzTSA9IDMwCmBgYAoKYGBge3J9CnRlc3RNPWtOTmRpc3QoTS5zdmQkdVssMTpucGNzTV0sIGsgPSA3LGFsbD1UUlVFKQp0ZXN0bWluTT1hcHBseSh0ZXN0TSwxLG1pbikKYGBgCgpgYGB7cn0KcGxvdChzb3J0KHRlc3RtaW5NW3Rlc3RtaW5NPjFlLTE1XSksbG9nPSJ5IikKdGhyZXNoaG9sZHM9IGMoMC4xLDAuMDEsMC4wMDEsMC4wMDAxLDAuMDAwMDEsMC4wMDAwMDEpCmFibGluZShoPWMoMC4wMSwwLjAwMSwwLjAwMDEsMC4wMDAwMSwwLjAwMDAwMSkpCmFibGluZShoPTAuNSwgY29sPSJyZWQiKQpgYGAKCkl0IGxvb2tzIGxpa2Ugd2Ugd2FudCBlcHMgPSAwLjUgYWx0aG91Z2ggd2UgZG9udCBzZWVtIHRvIGdldCBhIGtuZWUgaW4gdGhlIGRhdGEgc28gaXQncyBoYXJkIHRvIHBpbnBvaW50IHRoaXMuCgpgYGB7cn0KZGJzY2FucmVzTSA9IGRic2NhbihNLnN2ZCR1WywxOm5wY3NNXSxlcHMgPSAwLjUpCmBgYAoKV2UncmUganVzdCBnb2luZyB0byBsb29rIGF0IHRoZSBmaXJzdCA1IHBjYSdzIHNpbmNlIHRoaXMgaXMgYXdmdWwgdG8gbG9vayBhdCBpZiB3ZSB1c2UgYWxsIDMwLgoKYGBge3J9CmZvciAoayBpbiAxOjQpewogICAgYSA9IHNlcShrKzEsNSkKICAgIGZvciAobCBpbiBhKXsKICAgICAgICBpZihrPT1sKXtuZXh0fQogICAgICAgIHBsb3QoTS5zdmQkdVssa10sCiAgICAgICAgICAgIE0uc3ZkJHVbLGxdLHhsYWI9IiIsCiAgICAgICAgICAgIHlsYWI9IiIsCiAgICAgICAgICAgIGNvbD1jKCIjNjY2NjY2NjYiLHJhaW5ib3coNDEpKVtkYnNjYW5yZXNNJGNsdXN0ZXIrMV0scGNoPTE5LGNleD0wLjUpCiAgICB9Cn0KYGBgCgpSZWZlcmVuY2VzOgoKMS4gW0RhdGEgZnJvbSBTZWNSZXBvXShodHRwczovL3d3dy5zZWNyZXBvLmNvbSkKCjIuIFtDb252ZXJ0aW5nIGNhdGVnb3JpY2FsIHZhcmlhYmxlc10oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvNDc5MjIxODQvY29udmVydC1jYXRlZ29yaWNhbC12YXJpYWJsZXMtdG8tbnVtZXJpYy1pbi1yLzQ3OTIzMTc4KQoKMy4gW0FkZGluZyBjb2x1bW5zIHRvIGRhdGEgZnJhbWVzXShodHRwczovL2Rpc2N1c3MuYW5hbHl0aWNzdmlkaHlhLmNvbS90L2hvdy10by1hZGQtYS1jb2x1bW4tdG8tYS1kYXRhLWZyYW1lLWluLXIvMzI3OCkKCjQuIFtGaW5kaW5nIFVuaXF1ZSBWYWx1ZXNdKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzQxOTA2ODc4L3ItbnVtYmVyLW9mLXVuaXF1ZS12YWx1ZXMtaW4tYS1jb2x1bW4tb2YtZGF0YS1mcmFtZSkKCjUuIFtEQlNDQU4gb24gZmxvd2Vyc10oaHR0cHM6Ly93d3cuZ2Vla3Nmb3JnZWVrcy5vcmcvZGJzY2FuLWNsdXN0ZXJpbmctaW4tci1wcm9ncmFtbWluZy8pCgo2LiBbU2F2aW5nIFBsb3RzIChjcmVkaXQgbXVzdCBhbHNvIGJlIGdpdmVuIHRvIEFsZXggZm9yIGhlbHBpbmcgbWUgb3V0IGEgaHVnZSBhbW91bnQgaGVyZSldKGh0dHA6Ly93d3cuc3RoZGEuY29tL2VuZ2xpc2gvd2lraS9jcmVhdGluZy1hbmQtc2F2aW5nLWdyYXBocy1yLWJhc2UtZ3JhcGhzKQoKNy4gW0RCU0NBTiBQYXJhbWV0ZXIgRXN0aW1hdGlvbl0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvREJTQ0FOI1BhcmFtZXRlcl9lc3RpbWF0aW9uKQoKOC4gW0ZpbmRpbmcgdGhlIGtuZWUgaW4ga05ORGlzdF0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2Ric2Nhbi92ZXJzaW9ucy8xLjEtNS90b3BpY3Mva05OZGlzdCkKCjkuIFtTaWxob3VldHRlIFNjb3JlIGludHJvZHVjdGlvbl0oaHR0cHM6Ly9tZWRpdW0uY29tL2NvZGVzbWFydC9yLXNlcmllcy1rLW1lYW5zLWNsdXN0ZXJpbmctc2lsaG91ZXR0ZS03OTQ3NzRiNDY1ODYpCgoxMC4gW0Vycm9yIHdpdGggc2lsaG91ZXR0ZSBzY29yZV0oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvNTEyNDgyOTMvZXJyb3ItdmVjdG9yLW1lbW9yeS1leGhhdXN0ZWQtbGltaXQtcmVhY2hlZC1yLTMtNS0wLW1hY29zKQoKMTEuIFtTaWxob3VldHRlIEZ1bmN0aW9uXShodHRwczovL3d3dy5yZG9jdW1lbnRhdGlvbi5vcmcvcGFja2FnZXMvY2x1c3Rlci92ZXJzaW9ucy8yLjEuMC90b3BpY3Mvc2lsaG91ZXR0ZSkKCjEyLiBbQXNzaWduIGZ1bmN0aW9uIGZvciBjcmVhdGluZyBtdWx0aXBsZSBkYXRhIGZyYW1lcyBhdCBvbmNlXShodHRwczovL3N0YWNrb3ZlcmZsb3cuY29tL3F1ZXN0aW9ucy80NDU3NTExMC9mb3ItbG9vcC1mb3ItY3JlYXRpbmctbXVsdGlwbGUtZGF0YS1mcmFtZXMtYW5kLWFzc2lnbmluZy12YWx1ZXMpCgoxMy4gW0V4cG9ydGluZyBhIGRhdGEgZnJhbWUgYXMgYSBwZGZdKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzQyODYwNzE2L2V4cG9ydC1kYXRhZnJhbWUtdG8tcGRmLXBuZy1pbi1yKQoKMTQuIFtQbG90dGluZyBtdWx0aXBsZSBib3ggcGxvdHMgdXNpbmcgZ2dwcGxvdF0oaHR0cHM6Ly9zdGFja292ZXJmbG93LmNvbS9xdWVzdGlvbnMvMTEzNDY4ODAvci1wbG90LW11bHRpcGxlLWJveC1wbG90cy11c2luZy1jb2x1bW5zLWZyb20tZGF0YS1mcmFtZSkKCjE1LiBbVXNpbmcgdGhlIHV3b3QgcGFja2FnZV0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL3V3b3QvdmVyc2lvbnMvMC4wLjAuOTAwOSk=